# 1、安装pyspark ： pip install pyspark

from pyspark.sql import SparkSession
from pyspark.sql import DataFrame

# 创建spark环境
spark = SparkSession.builder.master("local").appName("spark").getOrCreate()

# 读取数据
student_df = spark.read.format("json").load("../data/students.json")

# 统计班级的人数
clazz_num: DataFrame = student_df.groupBy("clazz").count()

# 保存数据
clazz_num.write.format("csv").mode("overwrite").save("../data/clazz_num")
